# Scrapy settings for website project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html

# 项目名
BOT_NAME = 'website'

SPIDER_MODULES = ['website.spiders']
NEWSPIDER_MODULE = 'website.spiders'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False

# 设置最大的并发请求数量 (default: 16)
CONCURRENT_REQUESTS = 32

# 设置对访问同一个网站进行请求的延时时间
DOWNLOAD_DELAY = 5

# 设置对每个网站和每个IP的最大并发请求数量
CONCURRENT_REQUESTS_PER_DOMAIN = 8
CONCURRENT_REQUESTS_PER_IP = 8

# 设置是否禁用cookie (enabled by default)
COOKIES_ENABLED = False

# 是否开启使用爬虫spider的中间件
SPIDER_MIDDLEWARES = {
    'website.middlewares.WebsiteSpiderMiddleware': 543,
}

# 是否开启爬虫下载器的中间件

DOWNLOADER_MIDDLEWARES = {
    'website.middlewares.WebsiteDownloaderMiddleware': None,
    'website.middlewares.FanUserAgentMiddleware': 100,  # 设置ua
}

# 是否开启管道
ITEM_PIPELINES = {
    'website.pipelines.WebsitePipeline': 300,
}

# 设置redis
REDIS_HOST = "180.76.176.24"
REDIS_POST = 6379
REDIS_DB_INDEX = 0
REDIS_PASSWORD = "qq4145246"
# 不打印日志
LOG_LEVEL = "ERROR"

FEED_EXPORTERS = {
    'csv': 'website.to_csv.MyProjectCsvItemExporter',
}

FIELDS_TO_EXPORT = [
    'world_ranking',
    'university_name',
    'country',
    'url',
    'domain_name'
]